from sklearn.decomposition import  PCA
import pandas as pd;
#读取四张表的数据
prior = pd.read_csv("order_products_prior.csv")
products = pd.read_csv("products")
orders = pd.read_csv("orders.csv")
aisles = pd.read_csv("aisles.csv")
#合并四张表到一张表 胡-物品类别
_mg = pd.merge(prior,products,on=['product_id','product_id'])
_mg = pd.merge(_mg,orders,on=['order_id','order_id'])
mt = pd.merge(_mg,aisles,on=['aisle_id','order_id'])

#查看前10个数据
mt.head(10)

# 交叉表
cross = pd.crosstab(mt['user_id'],mt['aisle'])
cross.head(10)

# 主成分分析
pca = PCA(n_components=0.9)

data = pca.fit_transform(cross)
print(data)
print(data.shape)